#!/usr/bin/env expect
############################################################################
# Purpose: scontrol --local, --sibling options
#          scancel --sibling=<jobid>
#
# Reqs:    1. Using slurmdbd accounting storage type and is up
#          2. fed_slurm_base is defined in globals.local - set to directory that
#          has access to each federation configure (fedc1, fedc2, fedc3).
#          Eg.
#          fedr/slurm/ (src)
#          fedr/fed1/bin
#          fedr/fed1/sbin
#          fedr/fed1/etc
#          fedr/fed1/...
#          fedr/fed2/...
#          fedr/fed3/...
#          3. controllers are up and running.
############################################################################
# Copyright (C) 2017 SchedMD LLC.
# Written by Isaac Hartung <ihartung@schedmd.com>
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
############################################################################

source ./globals
source ./globals_accounting
source ./globals_federation

set fed_name    "feda"
set user_name   ""
set srun_job_cnt 0
set my_scontrol "${fed_slurm_base}/$fedc1/bin/scontrol"
set my_sacctmgr "${fed_slurm_base}/$fedc1/bin/sacctmgr"
set my_sbatch "${fed_slurm_base}/$fedc1/bin/sbatch"
set my_srun "${fed_slurm_base}/$fedc1/bin/srun"
set my_salloc "${fed_slurm_base}/$fedc1/bin/salloc"
set my_squeue "${fed_slurm_base}/$fedc1/bin/squeue"
regexp "($number)" [get_config_param "MinJobAge"] {} min_job_age
set min_job_age [expr {$min_job_age + 65}]


#
# Check accounting config and bail if not found.
#
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
	skip "This test can't be run without a usable AccountStorageType"
}

if {[get_admin_level] ne "Administrator"} {
	skip "This test can't be run without being an Accounting administrator. Use: sacctmgr mod user \$USER set admin=admin"
}


proc scontrol { option job_id regex args } {
	global fedc1 fed_slurm_base bin_sleep my_scontrol

	set matches 0

	set command "$my_scontrol $option show job $job_id"
	if { [string match $option "--local"] } {
		append command " -a"
		append command " $args"
	}

	spawn {*}$command
	expect {
		-re "$regex" {
			incr matches
		}
		timeout {
			fail "scontrol not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Didn't match regex ($regex: $matches)"
	}

}

proc sbatch { } {
	global number bin_sleep node_count my_sbatch
	global fedc1 fedc2 fedc3


	set script "\"sleep 35\""
	set matches 0
	set job_id 0
	set command "$my_sbatch -N$node_count --exclusive --output=/dev/null \
		--error=/dev/null -t300 --requeue --wrap $script \
		-M$fedc1,$fedc2,$fedc3"
	set regex "Submitted batch job ($number)"
	set collect 1

	spawn {*}$command
	expect {
		-re "$regex" {
			incr matches
			if { $collect } {
				set job_id $expect_out(1,string)
			}

		}
		timeout {
			fail "sbatch not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Batch submit failure"
	}

	$bin_sleep 2

	return $job_id
}

proc squeue { job_id regex args } {
	global my_squeue
	global fedc1 fedc2 fedc3

	set matches 0
	set command "$my_squeue --noheader -a --local -Ostatecompact:.5i\
		-j $job_id"

	if {$args ne ""} {
		append command " -M$args"
	} else {
		append command " -M$fedc1,$fedc2,$fedc3"
	}
	spawn {*}$command
	expect {
		-re "$regex" {
			incr matches
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "Unexpected error in squeue (expected $regex)"
	}
}

proc verify { job_id output } {
	global squeue fedc1 fedc2 fedc3

	if {[string match $output $fedc1]} {
		squeue $job_id "CLUSTER: $fedc1\\s+R\\s+CLUSTER: $fedc2\\s+slurm_load_jobs error: Invalid job id specified\\s+CLUSTER: $fedc3\\s+slurm_load_jobs error: Invalid job id specified\\s+"
		scontrol --local $job_id "JobId.+" -M$fedc1
		scontrol --local $job_id "slurm_load_jobs error: Invalid job id specified" -M$fedc2
		scontrol --local $job_id "slurm_load_jobs error: Invalid job id specified" -M$fedc3
	}
	if {[string match $output $fedc2]} {
		squeue $job_id "CLUSTER: $fedc1\\s+RV\\s+CLUSTER: $fedc2\\s+R\\s+CLUSTER: $fedc3\\s+slurm_load_jobs error: Invalid job id specified\\s+"
		scontrol --local $job_id "JobId.+" -M$fedc1
		scontrol --local $job_id "slurm_load_jobs error: Invalid job id specified" -M$fedc3
		scontrol --local $job_id "JobId.+" -M$fedc2
	}
	if {[string match $output $fedc3]} {
		squeue $job_id "CLUSTER: $fedc1\\s+RV\\s+CLUSTER: $fedc2\\s+slurm_load_jobs error: Invalid job id specified\\s+CLUSTER: $fedc3\\s+R\\s+"
		scontrol --local $job_id "JobId.+" -M$fedc1
		scontrol --local $job_id "slurm_load_jobs error: Invalid job id specified" -M$fedc2
		scontrol --local $job_id "JobId.+" -M$fedc3
	}

}

proc scancel_mod { sibling job_id regex} {
	global fed_slurm_base fedc1

	set matches 0
	set my_scancel "${fed_slurm_base}/$fedc1/bin/scancel"

	spawn $my_scancel --sibling=$sibling $job_id
	expect {
		-re "$regex" {
			incr matches
		}
		timeout {
			fail "scancel not responding"
		}
		eof {
			wait
		}
	}
	if {$matches != 1} {
		fail "scancel failure (expected $regex)"
	}
}

proc cancel_federation_jobs { } {
	global scancel user_name fedc1 fedc2 fedc3

	spawn $scancel -M$fedc1,$fedc2,$fedc3 --user $user_name
	expect {
		eof {
			wait
		}
	}
	sleep 5
}

proc cleanup { } {
	global fed_name bin_bash bin_rm test_name

	cancel_federation_jobs
	delete_federations $fed_name
	exec $bin_bash -c "$bin_rm -f $test_name*.out"
}

# Start test

if {![check_federation_setup]} {
	skip "This test can't be run without fed_slurm_base, fedc1, fedc2, fedc3 setup in globals.local"
}

if {![check_federation_up]} {
	skip "This test can't be run without all clusters up"
}

set user_name [get_my_user_name]

# Remove existing setup
cleanup

# Add clusters to federation
if [setup_federation $fed_name] {
	fail "Failed to setup federation"
}

# Get number of nodes per cluster
set node_count [llength [get_nodes_by_state idle,alloc,comp]]

log_info "################################################################"
log_info "Setup cluster features"
log_info "################################################################"

set matches 0
spawn $sacctmgr -i modify cluster $fedc1 set features=fa
expect {
	-re "Setting$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+Feature\\s+=\\s+fa" {
		incr matches
		exp_continue
	}
	-re "Modified cluster...$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+$fedc1$eol" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 4} {
	fail "Unexpected error (got $matches)"
}

set matches 0
spawn $sacctmgr -i modify cluster $fedc2 set features=fb
expect {
	-re "Setting$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+Feature\\s+=\\s+fb" {
		incr matches
		exp_continue
	}
	-re "Modified cluster...$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+$fedc2$eol" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 4} {
	fail "Unexpected error (got $matches)"
}

set matches 0
spawn $sacctmgr -i modify cluster $fedc3 set features=fc
expect {
	-re "Setting$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+Feature\\s+=\\s+fc" {
		incr matches
		exp_continue
	}
	-re "Modified cluster...$eol" {
		incr matches
		exp_continue
	}
	-re "^\\s+$fedc3$eol" {
		incr matches
		exp_continue
	}
	timeout {
		fail "sacctmgr mod not responding"
	}
	eof {
		wait
	}
}
if {$matches != 4} {
	fail "Unexpected error (got $matches)"
}

log_info "################################################################"
log_info "Test scontrol --local and --sibling"
log_info "################################################################"

set ji0 [sbatch]
set ji1 [sbatch]
set ji2 [sbatch]

set job_id [sbatch]

squeue $job_id "CLUSTER: $fedc1\\s+PD\\s+CLUSTER: $fedc2\\s+PD\\s+CLUSTER: $fedc3\\s+PD\\s+"

scontrol --sibling $job_id "JobId.+JobId.+JobId.+"

set output [wait_for_fed_job $ji0 RUNNING $fedc1,$fedc2,$fedc3]

verify $ji0 $output

set output [wait_for_fed_job $ji1 RUNNING $fedc1,$fedc2,$fedc3]

verify $ji1 $output

set output [wait_for_fed_job $ji2 RUNNING $fedc1,$fedc2,$fedc3]

verify $ji2 $output

log_info "################################################################"
log_info "Scancel --sibling=<clustername><jobid> will remove on active pending"
log_info "################################################################"

cancel_federation_jobs

set ji0 [sbatch]
set ji1 [sbatch]
set ji2 [sbatch]

set output [wait_for_fed_job $ji0 RUNNING $fedc1,$fedc2,$fedc3]

verify $ji0 $output

set output [wait_for_fed_job $ji1 RUNNING $fedc1,$fedc2,$fedc3]

verify $ji1 $output

set output [wait_for_fed_job $ji2 RUNNING $fedc1,$fedc2,$fedc3]

verify $ji2 $output

scancel_mod $fedc1 $ji0 "scancel: error: Kill job error on job id $ji0: Job is\
 no longer pending execution"

scancel_mod $fedc1 $ji1 "scancel: error: Kill job error on job id $ji1: Job is\
 no longer pending execution"

scancel_mod $fedc1 $ji2 "scancel: error: Kill job error on job id $ji2: Job is\
 no longer pending execution"

set job_id [sbatch]

squeue $job_id "\\s+PD\\s+" $fedc1
squeue $job_id "\\s+PD\\s+" $fedc2
squeue $job_id "\\s+PD\\s+" $fedc3

scancel_mod $fedc1 $job_id ""

squeue $job_id "\\s+RV\\s+" $fedc1
squeue $job_id "\\s+PD\\s+" $fedc2

scancel_mod $fedc2 $job_id ""

squeue $job_id "\\s+RV\\s+" $fedc1
squeue $job_id "slurm_load_jobs error: Invalid job id specified\\s+" $fedc2
squeue $job_id "\\s+PD\\s+" $fedc3

scancel_mod $fedc3 $job_id ""

squeue $job_id "\\s+RV\\s+" $fedc1
squeue $job_id "slurm_load_jobs error: Invalid job id specified\\s+" $fedc2
squeue $job_id "slurm_load_jobs error: Invalid job id specified\\s+" $fedc3
